suppressPackageStartupMessages(library(tidyverse))
library(patchwork)
devtools::load_all('~/Google Drive/My Drive/Scripts/R_packages/myUtilities/')
## ℹ Loading myUtilities
Settings
data_dir <- '/Volumes/Mitsu_NGS_3/METTL2A/'
wd <- "~/Google Drive/My Drive/Analysis/METTL2A/"
setwd(wd)
figdir <- paste0(wd, 'Figures/Shortread/Stringtie_tximport_DESeq2/Correlation/')
tabledir <- paste0(wd, 'Tables/Shortread/')
theme_set(
theme_classic(base_size = 7) +
theme(legend.position = 'bottom')
)
Read data
shortread_stringtie_txi_DESeq2 <-
read_tsv(
paste0(wd, 'Tables/Shortread/shortread_stringtie_txi_DESeq2_DEG_methylation_2024-04-17.tsv.gz')
)
## Rows: 12974 Columns: 27
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (9): gene_name, gene_id, methylation, isUp, isDown, common_DEGs, gene_t...
## dbl (18): siMETTL2A_baseMean, siMETTL2A_log2FoldChange, siMETTL2A_lfcSE, siM...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
shortread_stringtie_txi_DESeq2
## # A tibble: 12,974 × 27
## gene_name siMETTL2A_baseMean siMETTL2A_log2FoldChange siMETTL2A_lfcSE
## <chr> <dbl> <dbl> <dbl>
## 1 A1CF 4720. -0.917 0.0782
## 2 A4GALT 335. 1.89 0.567
## 3 AAAS 3917. -0.298 0.139
## 4 AACS 4911. -0.146 0.337
## 5 AADAT 15.5 -2.52 1.93
## 6 AAGAB 9386. -0.285 0.145
## 7 AAK1 54.6 0.205 0.632
## 8 AAMDC 1677. 0.763 0.529
## 9 AAMP 20411. 0.0309 0.200
## 10 AAR2 4967. -0.106 0.401
## # ℹ 12,964 more rows
## # ℹ 23 more variables: siMETTL2A_stat <dbl>, siMETTL2A_pvalue <dbl>,
## # siMETTL2A_padj <dbl>, siMETTL2A_G_baseMean <dbl>,
## # siMETTL2A_G_log2FoldChange <dbl>, siMETTL2A_G_lfcSE <dbl>,
## # siMETTL2A_G_stat <dbl>, siMETTL2A_G_pvalue <dbl>, siMETTL2A_G_padj <dbl>,
## # siMETTL2A_I_baseMean <dbl>, siMETTL2A_I_log2FoldChange <dbl>,
## # siMETTL2A_I_lfcSE <dbl>, siMETTL2A_I_stat <dbl>, …
shortread_stringtie_txi_DESeq2 |>
export_tsv(outdir = tabledir, compression = 'gz')
##
## Exported to: ~/Google Drive/My Drive/Analysis/METTL2A/Tables/Shortread/shortread_stringtie_txi_DESeq2_2024-07-30.tsv.gz
## # A tibble: 12,974 × 27
## gene_name siMETTL2A_baseMean siMETTL2A_log2FoldChange siMETTL2A_lfcSE
## <chr> <dbl> <dbl> <dbl>
## 1 A1CF 4720. -0.917 0.0782
## 2 A4GALT 335. 1.89 0.567
## 3 AAAS 3917. -0.298 0.139
## 4 AACS 4911. -0.146 0.337
## 5 AADAT 15.5 -2.52 1.93
## 6 AAGAB 9386. -0.285 0.145
## 7 AAK1 54.6 0.205 0.632
## 8 AAMDC 1677. 0.763 0.529
## 9 AAMP 20411. 0.0309 0.200
## 10 AAR2 4967. -0.106 0.401
## # ℹ 12,964 more rows
## # ℹ 23 more variables: siMETTL2A_stat <dbl>, siMETTL2A_pvalue <dbl>,
## # siMETTL2A_padj <dbl>, siMETTL2A_G_baseMean <dbl>,
## # siMETTL2A_G_log2FoldChange <dbl>, siMETTL2A_G_lfcSE <dbl>,
## # siMETTL2A_G_stat <dbl>, siMETTL2A_G_pvalue <dbl>, siMETTL2A_G_padj <dbl>,
## # siMETTL2A_I_baseMean <dbl>, siMETTL2A_I_log2FoldChange <dbl>,
## # siMETTL2A_I_lfcSE <dbl>, siMETTL2A_I_stat <dbl>, …
Plot
shortread_stringtie_txi_DESeq2_2dhistogram <-
shortread_stringtie_txi_DESeq2 |>
ggplot(aes(x = siMETTL2A_G_log2FoldChange, y = siMETTL2A_I_log2FoldChange)) +
geom_hex(bins = 100) +
scale_fill_viridis_c(trans = 'log10') +
lims(x = c(-10, 10), y = c(-10, 10)) +
tune::coord_obs_pred(ratio = 1)
shortread_stringtie_txi_DESeq2_2dhistogram |>
ggsave_multiple_formats(
width = 5, height = 5, fontsize = 7, outdir = figdir
)
## Warning: Removed 284 rows containing non-finite values (`stat_binhex()`).
## Removed 284 rows containing non-finite values (`stat_binhex()`).
## Removed 284 rows containing non-finite values (`stat_binhex()`).
## Removed 284 rows containing non-finite values (`stat_binhex()`).
## Removed 284 rows containing non-finite values (`stat_binhex()`).

shortread_stringtie_txi_DESeq2 |>
ggplot(aes(x = siMETTL2A_baseMean)) +
geom_histogram() +
scale_x_log10() +
geom_vline(xintercept = c(100))
## Warning: Transformation introduced infinite values in continuous x-axis
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 191 rows containing non-finite values (`stat_bin()`).

shortread_stringtie_txi_DESeq2 |>
plot_2dhistogram_withcortest(
x = siMETTL2A_G_log2FoldChange, y = siMETTL2A_I_log2FoldChange
)
## Warning in cor.test.default(x = mf[[1L]], y = mf[[2L]], ...): Cannot compute
## exact p-value with ties
## # A tibble: 2 × 9
## estimate statistic p.value method method_short alternative parameter
## <dbl> <dbl> <dbl> <chr> <chr> <chr> <int>
## 1 0.309 2.36e11 1.01e-278 Spearman's ra… Spearman two.sided NA
## 2 0.369 4.47e 1 0 Pearson's pro… Pearson two.sided 12705
## # ℹ 2 more variables: conf.low <dbl>, conf.high <dbl>
## # A tibble: 2 × 1
## msg
## <chr>
## 1 Spearman: r = 0.31, p < 2.2e-16
## 2 Pearson: r = 0.37, p < 2.2e-16

Number of DEGs
shortread_stringtie_txi_DESeq2 |>
group_by(isUp) |>
reframe(n = n())
## # A tibble: 4 × 2
## isUp n
## <chr> <int>
## 1 common 1685
## 2 not 7112
## 3 only G 1998
## 4 only I 2179
shortread_stringtie_txi_DESeq2 |>
group_by(isDown) |>
reframe(n = n())
## # A tibble: 4 × 2
## isDown n
## <chr> <int>
## 1 common 1805
## 2 not 7244
## 3 only G 1838
## 4 only I 2087